{-
  Generate unicode tables used in shims, src/string.js
  using the same encoding as the metadata and static data
  and a simple RLE scheme that compresses sequences with the
  same value and also alternating sequences
 -}
module Main where

import Data.List.Split
import Data.Bits
import Data.Char
import Data.List (group)
import Gen2.Compactor

maxEnc = 737187

longEnc n = [n `shiftR` 16, n .&. 0xffff]

-- use one bit for more efficient encoding of stretches of length one
combineRanges :: [Int] -> [Int]
combineRanges xs | n > 1 = 0 : n : combineRanges (drop (2*n) xs) -- alternating pattern, zero
  where n = (length $ takeWhile (==1) xs) `div` 2
combineRanges (1:x:xs)
  | 2*x+2 > maxEnc = 1 : longEnc x ++ combineRanges xs -- length 1, odd number
  | otherwise      = 2*x+1          : combineRanges xs
combineRanges (x:xs)
  | 2*x+1 > maxEnc = 2 : longEnc x ++ combineRanges xs -- other length, even number
  | otherwise      = 2*x+2          : combineRanges xs
combineRanges []   = []

{-
   encode ranges, starts at -1, the first range is always False,
 -}
encodeRLE :: [Bool] -> String
encodeRLE xs = encodeStr . combineRanges . map length . dropTrailingFalse . group $ False:xs
  where
    dropTrailingFalse ys
      | not (head $ last ys) = init ys
      | otherwise            = ys

combineMappings :: [(Int,Int)] -> [Int]
combineMappings xs@((1,v1):(1,v2):_)                                       -- alternating pattern, zero
  | n >= 1 = [0,n,v1,v2] ++ combineMappings (drop (2*n) xs)
    where n = length (takeWhile (==[(1,v1),(1,v2)]) (chunksOf 2 xs))
combineMappings ((1,v):xs)
  | 2*v+1 <= maxEnc        = [2*v+1] ++ combineMappings xs                 -- length 1 combined with value: odd number
combineMappings ((n,v):xs)
  | 2*n+2 > maxEnc         = 2 : longEnc n ++ [v] ++ combineMappings xs    -- other length: even number
  | otherwise              = [2*n+2,v]            ++ combineMappings xs
combineMappings []         = []

{-
  encode mapping, starts at 0
 -}
encodeMapping :: [Int] -> String
encodeMapping xs = encodeStr . combineMappings $ map (\ys -> (length ys, head ys)) (group xs)

-- we can assume that the encoder does not generate characters that have to be
-- escaped or are outside ASCII range as long as it's double quoted
assignDat :: String -> String -> String
assignDat var val = "var h$" ++ var ++ " = \"" ++ val ++ "\";"

mkRanges :: (Char -> Bool) -> String
mkRanges p = encodeRLE $ map p listChars

toAbs :: Int -> Int
toAbs x | x < 0     = 2 * (abs x) - 1
        | otherwise = 2 * x

-- skip unassigned planes 3-13
listChars = map toEnum ([0..0x2FFFF] ++ [0xE0000..0x10FFFF])

-- must map to a nonnegative int
mkMapping :: (Char -> Int) -> String
mkMapping f = encodeMapping $ map f listChars

-- map the biggest categories to zero. PrivateUse is fixed and
-- the ranges are hardcoded in string.js
catNum :: GeneralCategory -> Int
catNum PrivateUse  = 0
catNum NotAssigned = 0
catNum x           = fromEnum x + 1

main = putStrLn . unlines $
         "// Unicode tables generated by ghcjs/utils/genUnicode.hs" :
         map (\(v,p) -> assignDat v (mkRanges p))
           [ ("printRanges", isPrint), ("alnumRanges", isAlphaNum)
           , ("lowerRanges", isLower), ("upperRanges", isUpper)
           , ("alphaRanges", isAlpha)] ++
         map (\(v,m) -> assignDat v (mkMapping m))
           [ ("toLowerMapping", (\c -> toAbs (ord (toLower c) - (ord c))))
           , ("toUpperMapping", (\c -> toAbs (ord (toUpper c) - (ord c))))
           , ("toTitleMapping", (\c -> toAbs (ord (toTitle c) - (ord c))))
           , ("catMapping", catNum . generalCategory)
           ]
